package com.fortune.conf;

import us.codecraft.webmagic.*;
import us.codecraft.webmagic.processor.PageProcessor;

public class SOHUPageNewsSpider implements PageProcessor {

    // 抓取网站的相关配置，包括编码、抓取间隔、重试次数等
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    public Site getSite() {
        return site;
    }

    public void process(Page page) {
        page.putField("title", page.getHtml().xpath("//*[@id=\"article-container\"]/div[2]/div[1]/div/div[1]/h1").toString());
        page.putField("contentImages",page.getHtml().xpath("//*[@id=\"mp-editor\"]/p/img/@data-src").all());
        page.putField("content",page.getHtml().xpath("//*[@id=\"mp-editor\"]").all());

//        }
//        // 列表页
//        else {
//            // 文章url
//            page.addTargetRequests(
//                    page.getHtml().xpath("//*[@id=\"__next\"]/div/section/div/main/a/@href").all());
//            // 翻页url
//            page.addTargetRequests(
//                    page.getHtml().xpath("/html/body/div[3]/div[1]/div[3]/a[@class='page-btn-prev']/@href").all());
//        }
    }

    public static void main(String[] args) {
        String url =
                "https://www.sohu.com/a/876503476_120684531?scm=10004.51220_15-300008.0.0.1101.topic:51220:5.0.9.a2_3X63&spm=smpc.channel_199.block3_43_0TRUdj_1_fd.3.17431480051554AuzolI_392";
        Spider.create(new SOHUPageNewsSpider()).addUrl(url).addPipeline(new SavePipelineDate())
                .thread(3).run();
    }
}
